*** BCG descriptive stats 
* Basile Grassi

****************************** 1. Preamble ******

* clean up 
	clear all 
	macro drop _all
	set max_memory 50g
	set matsize 11000

*set working directory. 
	cd "C:\Users\Public\Documents\BCG_DGM\BCG\replication_file_jan25_package\"


	
	
****************************************CHANGE SPECS AND OUTLIER ***************	
local specsDeta_l    _baseline
local winlevel_l    03 
********************************************************************************

**********************CHOICE of outliuer treatment level
* winsorization level 
global winlevel_gl `winlevel_l'
di "winlevel_gl=$winlevel_gl"

*trimmeing level (thousands)
if "`winlevel_l'"=="01"{
	global toptr_gl 990
	global bottr_gl 10
}
else if "`winlevel_l'"=="015"{
	global toptr_gl 985
	global bottr_gl 15
}
else if "`winlevel_l'"=="02"{
	global toptr_gl 980
	global bottr_gl 20
}
else if "`winlevel_l'"=="025"{
	global toptr_gl 975
	global bottr_gl 25
}
else if "`winlevel_l'"=="03"{
	global toptr_gl 970
	global bottr_gl 30
}

di "toptr_gl = $toptr_gl"
di "toptr_gl = $bottr_gl"

*global specsDeta 
global specsDeta `specsDeta_l'
di "specsDeta (global)==$specsDeta"	

di "results ending in  _$winlevel_gl$specsDeta" 


*choose the dataset

* select dataset with production data 
global dataset "ficusfare_reduced_9419_sec2_v0222"
local indlist 08 13 14 15 16 17 18 20 22 23 24 25 26 27 28 29 30 31 32 33 43 46 70 95

di "data=$dataset"

di "==================================="
	

********************************** Set up log file	**************************************
log using BCG/logs/BCG_statdes_sizevol_$winlevel_gl$specsDeta.log, replace
******************************************************************************************
	
******************************************************************************************
****************************** 2. Load the data ******
******************************************************************************************
	
*select path
cd "C:\Users\Public\Documents\BCG_DGM\BCG\replication_file_jan25_package\"

*load the firm-level data:

use data/$dataset, clear 


**drop some useless variables
capture drop acha1 acha2 acha3 acha5 acha6 cogs cogs_newdef acha1R acha2R acha3R acha5R acha6R  invcorp catotal_tot

**destring the sector code before the merge
destring naf2d, replace

	
*Compute Markup 

	** define production variables
	gen s = ln(catotalR)
	gen m = ln(acha4R)	//m will be the variable input 
	gen v = ln(salR)
	gen k = ln(immocorR)
	gen o = ln(autachaR)

	** define cost ratio 
	gen ratio = catotal/acha4 
	
	**Loop over the quantity and revenu markup
	foreach specs in FSQ BFSR  {
		***merge coefficients 
		merge m:1 naf2d using "data\coefficients_byind_Python_`specs'$specsDeta.dta", nogen
		
		***rename coefficients 
		foreach beta in const_cd m_cd v_cd o_cd k_cd const_tl m_tl v_tl o_tl k_tl m2_tl mv_tl mo_tl mk_tl v2_tl vo_tl vk_tl o2_tl ok_tl k2_tl {
			rename beta_`beta' beta_`beta'_`specs'
		}
		
		***drop redundant variables
		drop N med_CD med_TL iqr_CD iqr_TL  rho_cd  rho_tl 
		
		
		***calculate elasticity - TL 
		gen elast_tl_s`specs'_t1 = beta_m_tl_`specs'	 				 	 + 	/// 
													2*beta_m2_tl_`specs'*m   + 	///
													  beta_mv_tl_`specs'*v 	 +	///
													  beta_mo_tl_`specs'*o 	 +	///
													  beta_mk_tl_`specs'*k	 
												
		***define elasticity - CD 									
		gen elast_cd_s`specs'_t1 = beta_m_cd_`specs'
		
		
		***calculate markups 
		gen mu_sepcal_TL_s`specs'_t1 =  elast_tl_s`specs'_t1 * ratio 
		gen mu_sepcal_CD_s`specs'_t1 =  elast_cd_s`specs'_t1 * ratio
			
		***calculate log markups
		gen l_mu_sepcal_TL_s`specs'_t1 = log(mu_sepcal_TL_s`specs'_t1) 
		gen l_mu_sepcal_CD_s`specs'_t1 = log(mu_sepcal_CD_s`specs'_t1)
	}
	

	***Compute Lerner markup (based on Net oprating profit margin)
		
		** return on capital
		gen r = 0.04 //return on capital (net of depreciation)
		gen rk = r*immocor //total cost of capital
		
		**Total cost
		gen tc = sal + acha4 + rk + autacha
		
		**Lerner index
		gen Lerner = (catotal - tc)/catotal
		
		**Lerner markup
		gen mu_sepcal_Lerner_t1 = (1-Lerner)^(-1)
		
		**Log Lerner makup
		gen l_mu_sepcal_Lerner_t1  = log(mu_sepcal_Lerner_t1)
	
	

**drop some useless variables
*capture drop ml_* ratio_* elast_*	
	


************************************************************************************************************************************************************************************
******************************************************************* 3. Selecting Final Sample and Some indicators
************************************************************************************************************************************************************************************	
	
*Keep firms with positive VA	
	keep if va>0

*Keep firms with positive catotal and inputs
	/*
	gen sample_firm_inputs = 0
	replace sample_firm_inputs = 1 if (catotalR>0 & salR>0 & immocorR>0 & acha4R>0 & autachaR>0)
	label var sample_firm_inputs "=1 if (catotalR>0 & salR>0 & immocorR>0 & acha4R>0 & autachaR>0)"
	*/
	keep if sample_firm_inputs==1

*Keep the relevent sectors
	keep if sample_sectors2==1 //should be no change
	keep if naf2d_num != 95 & naf2d_num != 70 //Two other sectors that we want to drop
	

*Some indicator of markup level (quantity)
	gen markup_pos = 0
	replace markup_pos = 1 if mu_sepcal_TL_sFSQ_t1 > 0

	gen markup_half = 0
	replace markup_half = 1 if mu_sepcal_TL_sFSQ_t1 > 0.5
	
	gen markup_one = 0
	replace markup_one = 1 if mu_sepcal_TL_sFSQ_t1 > 1


*Keep siren with more than one year
	bysort siren: egen count_siren=count(catotal)
	keep  if count_siren>1
	
	
*Construct Sector-Level VA

	** Sector-level sum of va
	bysort naf_single year: egen va_total = total(va) 
	gen l_va_total = ln(va_total)
	
		
	**Get the list of 5-digits sector to drop
	local Nperiods 26
	preserve
		
		duplicates drop naf_single year , force 	
		keep naf_single year va_total  l_va_total 
		
		bysort naf_single: egen count_naf=count(l_va_total)
		
		duplicates drop naf_single,  force 	
		ta naf_single if count_naf<`Nperiods'
		
		keep if count_naf<`Nperiods'
		
		save  BCG/data/incomplete_naf_single_l_va.dta, replace
		
	restore
	
	
	
*Indicator of sector with complete VA series
	**indicator of bad sectors (manually)
	capture drop sample_sector_5d
	gen sample_sector_5d = 1
	foreach naf_sec in 13.96Z 16.22Z 24.46Z 27.31Z 27.33Z 28.24Z 28.99A 30.12Z 31.09A 33.11Z 33.13Z 33.17Z 33.19Z 33.20B 33.20D 43.11Z 43.21B 46.12A 46.47Z 46.48Z 46.65Z 46.73B{
		replace sample_sector_5d = 0 if naf_single=="`naf_sec'" 
	}
	
	**indicator of bad sectors (systematically)
	merge m:1 naf_single using "BCG/data/incomplete_naf_single_l_va.dta"
	gen sample_sector_5d_v2 = ( _merge!=3 )
	drop _merge count_naf
		
	** Drop obs for these sectors
	ta sample_sector_5d sample_sector_5d_v2
	keep if sample_sector_5d_v2==1
	
	*Get the list of 5-digits sector to drop for positive markup firms
	local Nperiods 26
	preserve
		
		** Sector-level sum of va for positive markup firms
		gen va_pos = .
		replace va_pos = va if markup_pos==1
		
		bysort naf_single year: egen va_pos_total = total(va_pos) 
		gen l_va_pos_total = ln(va_pos_total)
		
		su va_pos va l_va_pos_total l_va_total
		
		** make it a sector level database
		duplicates drop naf_single year , force 	
		keep naf_single year va_pos_total  l_va_pos_total 
		
		bysort naf_single: egen count_naf=count(l_va_pos_total)
		
		duplicates drop naf_single,  force 	
		ta naf_single if count_naf<`Nperiods'
		
		keep if count_naf<`Nperiods'
		
		save  BCG/data/incomplete_naf_single_l_va_pos.dta, replace
		
	restore //one sector to drop beyond the one dropped previously
	
*Indicator of sector with complete VA series for positive markup sample
	**indicator of bad sectors (manually)
	capture drop sample_sector_5d_pos
	gen sample_sector_5d_pos = 1
	foreach naf_sec in 30.30Z{
		replace sample_sector_5d_pos = 0 if naf_single=="`naf_sec'" 
	}
	
	**indicator of bad sectors (systematically)
	merge m:1 naf_single using "BCG/data/incomplete_naf_single_l_va_pos.dta"
	gen sample_sector_5d_pos_v2 = ( _merge!=3 )
	drop _merge count_naf
		
	** Drop obs for these sectors
	ta sample_sector_5d_pos sample_sector_5d_pos_v2
	keep if sample_sector_5d_pos_v2==1
	

*Aggregate-level sum of va
	**For the full sample
	bysort year: egen va_agg = total(va)
	gen l_va_agg = ln(va_agg)
	
		


******************************************************************************************
****************************** 4. Market Share Summary Stats *****************************
******************************************************************************************

*Choose market share

	**Sector level revenue
		bysort naf_single year: egen catotal_total = total(catotal)

	** Compute Market Share	
		gen share5_temp = catotal/catotal_total
		

*********** Select positive markup sample
	keep if markup_pos==1 
***********
	
	**cleaning
	capture drop marketShare marketShare_mtile Pctile1_MarketShare Pctile0d5_MarketShare Pctile0d1_MarketShare Pctile0d01_MarketShare
		
	** Choose the market share to use
		gen marketShare = ms5d

	**Level of Market Share
		**Summarize market share
******************************************************* Part of the Summary Stats Table *************************************************
		su marketShare, d
******************************************************* Part of the Summary Stats Table *************************************************

		
		**Compute the top percentile
		egen Pctile1_MarketShare = pctile(marketShare) ,p(99) 
		egen Pctile0d5_MarketShare = pctile(marketShare) ,p(99.5) 
		egen Pctile0d1_MarketShare = pctile(marketShare) ,p(99.9) 
		egen Pctile0d01_MarketShare = pctile(marketShare) ,p(99.99) 
		
		**Check summary stats on market share
******************************************************* Part of the Summary Stats Table *************************************************
		su marketShare Pctile1_MarketShare Pctile0d5_MarketShare Pctile0d1_MarketShare Pctile0d01_MarketShare
******************************************************* Part of the Summary Stats Table *************************************************
		
		**Number of observation with market share above 57%
		gen ms_above_57 = 0
		replace ms_above_57 = 1 if marketShare>=0.57
		
		ta ms_above_57

	
	
******************************************************************************************
****************************** 5. Other Summary Stats on exercice sample******************
******************************************************************************************

*Summary stats for baseline sample

	**Restriction
	keep if markup_pos==1 
	
	**Summary stats on Sales and inputs (sales, wage bill, capital, services, materials)
	local var s "sales"
	local var v "wage bill"
	local var k "capital"
	local var o "service"
	local var o "materials"
******************************************************* Part of the Summary Stats Table *************************************************
	su s v k o m, d
******************************************************* Part of the Summary Stats Table *************************************************
	
	**Winsorized 
	local top_tr $toptr_gl
	local bot_tr $bottr_gl
	local winlevel $winlevel_gl
	
	winsor  mu_sepcal_TL_sFSQ_t1 , p(.`winlevel') gen(mu_sepcal_TL_sFSQ_t1_w)
	winsor  mu_sepcal_TL_sBFSR_t1 , p(.`winlevel') gen(mu_sepcal_TL_sBFSR_t1_w)
	winsor  mu_sepcal_Lerner_t1, p(.`winlevel') gen(mu_sepcal_Lerner_t1_w)
	
	
	**Summary Stats on markup		
		***Winsorized
		label var mu_sepcal_TL_sFSQ_t1_w "Markup"
******************************************************* Part of the Summary Stats Table *************************************************
		su  mu_sepcal_TL_sFSQ_t1_w, d
******************************************************* Part of the Summary Stats Table *************************************************
		
	
				
			 

******************************************************* Anderson, Rebelo and Wong Table A2 *************************************************				
	**Anderson, Rebelo and Wong Table 13
	
		**Output Elasticity
			local top_tr $toptr_gl
			local bot_tr $bottr_gl
			local winlevel $winlevel_gl
			
			winsor  elast_tl_sFSQ_t1 , p(.`winlevel') gen(elast_tl_sFSQ_t1_w)
			winsor  elast_tl_sBFSR_t1 , p(.`winlevel') gen(elast_tl_sBFSR_t1_w)
			
			label var elast_tl_sFSQ_t1_w "elasticity quantity"
			label var elast_tl_sBFSR_t1_w "elasticity revenue"
***************************************************** In Table A2 ****************************************************
			su elast_tl_sFSQ_t1_w elast_tl_sBFSR_t1_w, d
***************************************************** In Table A2 ****************************************************
******************************************************* Part of the Summary Stats Table *************************************************
			su elast_tl_sFSQ_t1_w , d
******************************************************* Part of the Summary Stats Table *************************************************
			
		**Markup
		label var mu_sepcal_TL_sFSQ_t1_w "Quantity Markup"
		label var mu_sepcal_Lerner_t1_w "Lerner Markup"
		label var mu_sepcal_TL_sBFSR_t1_w "Revenue Markup"
***************************************************** In Table A2 ****************************************************
			su mu_sepcal_TL_sFSQ_t1_w mu_sepcal_Lerner_t1_w mu_sepcal_TL_sBFSR_t1_w, d
***************************************************** In Table A2 ****************************************************
			
		**relative markup
			gen mu_TL_sBFSR_sFSQ_ratio= mu_sepcal_TL_sBFSR_t1/mu_sepcal_TL_sFSQ_t1
			gen mu_Lerner_sFSQ_ratio=mu_sepcal_Lerner_t1/mu_sepcal_TL_sFSQ_t1
			
			local winlevel $winlevel_gl			
			winsor  mu_TL_sBFSR_sFSQ_ratio , p(.`winlevel') gen(mu_TL_sBFSR_sFSQ_ratio_w)
			winsor  mu_Lerner_sFSQ_ratio , p(.`winlevel') gen(mu_Lerner_sFSQ_ratio_w)
			
			label var mu_TL_sBFSR_sFSQ_ratio_w "Relative markup revenue wrt Quantity"
			label var mu_Lerner_sFSQ_ratio_w "Relative markup Lerner wrt Quantity"
***************************************************** In Table A2 ****************************************************
			su mu_TL_sBFSR_sFSQ_ratio_w mu_Lerner_sFSQ_ratio_w, d
***************************************************** In Table A2 ****************************************************
			
			
	
	

**Stats on the input ratio
	** define cost ratio and winsorize	
	local winlevel $winlevel_gl
	winsor ratio, gen(ratio_w)   p(.`winlevel')
	
	** Stats

	label var ratio_w "ratio of sales/materials"
******************************************************* Part of the Summary Stats Table *************************************************
	su ratio_w, d
******************************************************* Part of the Summary Stats Table *************************************************



**Stats on the elasticities

	***Compute elasticity
		****material
			**also already calculated above
		gen elast_m_tl_sFSQ_t1 = beta_m_tl_FSQ	 				 	 + 	/// 
												2*beta_m2_tl_FSQ*m   + 	///
												  beta_mv_tl_FSQ*v 	 +	///
												  beta_mo_tl_FSQ*o 	 +	///
												  beta_mk_tl_FSQ*k	 
											

		****capital
		gen elast_k_tl_sFSQ_t1 = beta_k_tl_FSQ	 				 	 + 	/// 
													2*beta_k2_tl_FSQ*k   + 	///
													  beta_vk_tl_FSQ*v 	 +	///
													  beta_ok_tl_FSQ*o 	 +	///
													  beta_mk_tl_FSQ*m	 
													  
		****labor
		gen elast_v_tl_sFSQ_t1 = beta_v_tl_FSQ	 				 	 + 	/// 
													2*beta_v2_tl_FSQ*v   + 	///
													  beta_vk_tl_FSQ*k 	 +	///
													  beta_vo_tl_FSQ*o 	 +	///
													  beta_mv_tl_FSQ*m	
													  
		****service
		gen elast_o_tl_sFSQ_t1 = beta_o_tl_FSQ	 				 	 + 	/// 
													2*beta_o2_tl_FSQ*o   + 	///
													  beta_vo_tl_FSQ*v 	 +	///
													  beta_ok_tl_FSQ*k 	 +	///
													  beta_mo_tl_FSQ*m	
													  
	

	
****Local RTS
gen LRTS = elast_m_tl_sFSQ_t1 + elast_k_tl_sFSQ_t1 + elast_v_tl_sFSQ_t1 + elast_o_tl_sFSQ_t1

	*****Winsorized
	local winlevel $winlevel_gl
	winsor LRTS, gen(LRTS_w)   p(.`winlevel')

label var LRTS_w "Local Return to Scale"	
******************************************************* Part of the Summary Stats Table *************************************************
	su LRTS_w, d
******************************************************* Part of the Summary Stats Table *************************************************
	
	
******************************************************************************************
****************************** 6. Size-Volatility Relationship****************************
******************************************************************************************

**** Additional restrictions ****
	*Positive markup only
	keep if markup_pos==1 


* Compute Growth Rate of market share

	*Declare Panel
	xtset firmsId year
	

	*Compute growth rate of market share
	gen g_marketShare=D.marketShare/L.marketShare

	gen l_marketShare = ln(marketShare)

	
	**** Additional restrictions ****	
		*Drop 1994 once growth rate have been computed
		*keep if year>1994

	

	*winsorize growth rate
	local winlevel $winlevel_gl
	winsor g_marketShare, gen(g_marketShare_w)   p(.`winlevel')


	
	
*Size Volatity relationship (Cross-Secction)

	


	**Compute percentile of market share 
	fastxtile marketShare_pct =  marketShare, nquantiles(100)  

	**Compute std of growth rate as a function of size
	bysort marketShare_pct: egen std_g_mShare_F_mShare_pct = sd(g_marketShare_w)






	** regression of volatility on size
******************************************************* Part of the Calibration Target *************************************************		
		**** The intercept of this regression are our calibration Target *******************
		reg std_g_mShare_F_mShare_pct marketShare
******************************************************* Part of the Calibration Target *************************************************		
		



*Size-Volatility relationship (Time-Serie)
		
	** Compute Mean Market Share
		bysort firmsId: egen mean_marketShare = mean(marketShare)


	** Compute Std of Growth of Market Share
		bysort firmsId: egen sd_g_marketShare_w = sd(g_marketShare_w)
	

	**compute sector level average
		egen naf_single_num = group(naf_single)
		bysort naf_single_num: egen va_total_mean = mean(va_total)


	** Size Volatility Relationship
	preserve
		***Keep only one oberservation by firms
			duplicates drop firmsId, force

		***Relationship Size-Vol on the whole sample
		

		****regression
******************************************************* Part of the Calibration Target *************************************************		
		**** The intercept of this regression are our calibration Target *******************
			reg sd_g_marketShare_w mean_marketShare
******************************************************* Part of the Calibration Target *************************************************		
	
			
	restore	 
	 
	 
*******************************************
log close
*******************************************	 
